/* This dofile converts the stata-ready data into regression-ready data

Batcher code for parallel execution (see ssc install batcher): 
	batcher F:\MonPol Project\ReplicationPackage\Dofiles\a1_PrepData.do, it(1/2) betweendelay(10) t(C:/StataWD)
 */

* Technicalities
** Parameters
global parallel "1"		// 1 or 6
global sample ""	// "" or _sample or _10pct
global testRun ""		// "" or 1
global P "PRF"			// P or PRF

global prices "price_raw price_ref"
global horizons "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24"

** Steps to run
global calcNrOfWeeks ""
global prepDispersionFiles ""						// Run with global sample "_sample" first to validate, then with global sample ""
	global pdf_combineCategories ""
	global pdf_splitPriceFiles ""
	global pdf_calculateWeights ""
	global pdf_laggedControls ""
	global pdf_competitorPrices ""
	global pdf_drawSamples ""
global prepDispersionFilesBySample "1"				// Be sure to run this with at least global sample "_10pct"
	global pdfbs_hybridPricesAndThresholds ""
	global pdfbs_priceAgeGaps ""
	global pdfbs_finalPrep ""
	global pdfbs_productMarketFrequencies "1"
global logitSamples_10pct ""
global prepAggregateData ""
global combineAggregateData ""

	
** Paths
global rootL "F:\MonPol Project\ReplicationPackage\Large"
global rootD "F:\MonPol Project\ReplicationPackage"

** Programs
qui do "$rootD/Dofiles/999_Paths"
qui do "$rootD/Dofiles/998_Programs"

** Log
cap log close log
if "$parallel" == "1" log using "$path_logs/a0_ImportData${sample}.smcl", append smcl name(log)
else log using "$path_logs/a0_ImportData${sample}_`1'.smcl", append smcl name(log)
cap mkdir "$path_regressionData"

** Define categories
if "$parallel" == "1" || "`1'" == "" local categories "beer blades cigets coffee carbbev factiss diapers fzpizza hhclean hotdog coldcer deod fzdinent razors photo paptowl mayo peanbutr margbutr mustketc milk laundet shamp saltsnck spagsauc sugarsub toitisu toothbr toothpa yogurt soup"
else if "$parallel" == "2" {
	global thread = `1'
	if $thread == 1 local categories "beer blades cigets deod fzpizza hhclean hotdog coldcer soup razors photo paptowl mayo peanbutr margbutr mustketc milk"
	if $thread == 2 local categories "yogurt spagsauc sugarsub toitisu toothbr toothpa coffee laundet saltsnck shamp factiss carbbev diapers fzdinent"
	
	if "`categories'" == "" {
		noisily di "Not assigned to category"
		exit
	}
	local maxProcessors = ceil(c(processors_mach) / $parallel)
	di "Limiting to `maxProcessors' per Stata instance"
	set processors `maxProcessors'
}
else if "$parallel" == "6" {
	global thread = `1'
	if $thread == 1 local categories "beer blades cigets deod"
	if $thread == 2 local categories "fzpizza hhclean hotdog coldcer"
	if $thread == 3 local categories "soup razors photo paptowl mayo peanbutr margbutr mustketc milk"
	if $thread == 4 local categories "yogurt spagsauc sugarsub toitisu toothbr toothpa"
	if $thread == 5 local categories "coffee laundet saltsnck shamp"
	if $thread == 6 local categories "factiss carbbev diapers fzdinent"
	
	if "`categories'" == "" {
		noisily di "Not assigned to category"
		exit
	}
	local maxProcessors = ceil(c(processors_mach) / $parallel)
	di "Limiting to `maxProcessors' per Stata instance"
	set processors `maxProcessors'
}



* Code
** Calculate number of weeks in each month
if "$calcNrOfWeeks" == "1" {
	* Load example dataset
	use "$path_output/completePriceData/razors.dta", clear
	
	* Count number of weeks per month
	gegen tag = tag(week)
	drop if tag != 1
	bysort month: egen nr_of_weeks = count(week)
	keep week nr_of_weeks
	
	* Save weekly dataset
	save "$path_intermediate/NrOfWeeks.dta", replace
	
	* Save monthly dataset
	gen month = mofd(week)
	format month %tm
	duplicates drop month, force
	drop week
	save "$path_intermediate/NrOfWeeks_m.dta", replace
}

** Prepare price dispersion files
if "$prepDispersionFiles" == "1" {
	* Start log
    cap log close prepDispersionFiles
	log using "$path_logs/a900_prepDispersionFiles${sample}.log", replace text name(prepDispersionFiles)	
	
	if "${pdf_combineCategories}" == "1" {
		* Start log
		log using "$path_logs/a900_pdf_combineCategories${sample}.log", replace text name(pdf_combineCategories)
	  
		* Append data
		clear	
			
		gen category = ""
		
		local allFiles: dir "$path_output/resetPrices${sample}" files "*.dta", respectcase
		foreach file of local allFiles {
			local category = subinstr("`file'", "resetPrices_", "", .)
			local category = subinstr("`category'", ".dta", "", .)
			di "`category'"
			append using "$path_output/resetPrices${sample}/resetPrices_`category'", nolabel keep(id_nr month P PRF P_reset PRF_reset)

			* Add category var [*]
			replace category = "`category'" if missing(category)
		}
		
		* Encode category var
		encodeCategories
		defineCategoryLabel
		label values category categoryLabel
			
		* Unique ID number
		sum id_nr, meanonly
		di r(max)
		assert r(max) < 1e8
		gen double id_nr_unique = id_nr + 1e8*category
		format id_nr_unique %15.1f
		
		* Drop if some price is missing
		drop if missing(P, PRF) == 1
		
		* Calculate price change and price change size
		bysort id_nr_unique (month): gen double DL_P = P - P[_n-1]
		bysort id_nr_unique (month): gen double DL_PRF = PRF - PRF[_n-1]
		
		gen byte D_P = abs(DL_P) > 0.0001 & DL_P != .
		gen byte D_PRF = abs(DL_PRF) > 0.0001 & DL_PRF != .
		
		* Lagged price gap
		gen double x_P = P - P_reset
		gen double x_PRF = PRF - PRF_reset
		
		xtset id_nr_unique month
		gen double L_x_P = L.x_P
		gen double L_x_PRF = L.x_PRF
		
		drop x_P x_PRF
		
		** Gap too large
		bysort id_nr_unique (month): gen gap = month - month[_n-1]	
		gen valid1 = (gap <= 1)
		drop if valid1 == 0
		drop gap valid1
		
		* Calculate average store gap
		** Add in store information [*]
		merge m:1 category id_nr using "$path_intermediate${sample}/crosswalk_store_idnr/crosswalk_store_idnr", keep(match master) nogen keepusing(upc_code iri_key)
		
		** Calculate mean (excl self)
		** Trick: mean excl self = (sum - self)/(count - 1)
		foreach var of varlist L_x* {
			bysort category iri_key month: gegen sum_`var' = total(`var')
			bysort category iri_key month: gegen count_`var' = count(`var')
			gen `var'_bar = (sum_`var' - `var') / (count_`var' - 1)
			label variable `var'_bar "Average gap in store, excl. current observation"
			drop sum_`var' count_`var'
		}


		* Save
		drop id_nr
		save "$path_regressionData/allResetPrices${sample}", replace
	}
	
	if "$pdf_splitPriceFiles" == "1" {
		* Start log
		log using "$path_logs/a900_pdf_splitPriceFiles${sample}.log", replace text name(pdf_splitPriceFiles)
		
		* Split file
		** P
		use "$path_regressionData/allResetPrices${sample}", replace
		keep category month P P_reset id_nr_unique upc_code DL_P D_P L_x_P iri_key L_x_P_bar
		recast int month 
		recast byte category
		save "$path_regressionData/allResetPrices_P${sample}", replace
		
		** PRF
		use "$path_regressionData/allResetPrices${sample}", replace
		keep category month PRF PRF_reset id_nr_unique upc_code DL_PRF D_PRF L_x_PRF iri_key L_x_PRF_bar
		recast int month 
		recast byte category
		save "$path_regressionData/allResetPrices_PRF${sample}", replace
		
		* Close log
		log close pdf_splitPriceFiles
	}

	if "$pdf_calculateWeights" == "1" {
		* Start log
		cap log close pdf_calculateWeights
		log using "$path_logs/a900_pdf_calculateWeights${sample}.log", replace text name(pdf_calculateWeights)
		
		if inlist("${sample}", "", "_sample") {		
			cap mkdir "$path_intermediate${sample}/selection_usedIdnrs"
			cap mkdir "$path_intermediate${sample}/selection_revYwIdnr"
			
			* List used idnrs
			** Load full data
			use "$path_regressionData/allResetPrices_PRF${sample}", replace
			
			** Reduce to list of id_nrs and months
			keep id_nr_unique month
			
			** Save
			save "$path_intermediate${sample}/selection_usedIdnrs/usedIdnrs", replace
		
			* Calculate weights
			** Run for each category
			foreach category of local categories {
				* Load expenditures
				noisily di as result "`category': `c(current_time)'"
				use id_nr dollar month using "$path_output/completePriceData${sample}/`category'", clear
				
				* Reduce to obs and vars we use
				** Reduce to monthly
				gcollapse (sum) dollar, by(id_nr month) fast
				
				* Generate unique id_nrs
				gen categoryS = "`category'"
				makeCategoryNumeric2
				
				gen double id_nr_unique = id_nr + 1e8*category
				format id_nr_unique %15.1f
				drop id_nr
				
				** Keep only observations used in regressions
				merge m:1 id_nr_unique month using "$path_intermediate${sample}/selection_usedIdnrs/usedIdnrs", keep(match)
				
				* Calculate total revenue (per firm per year)
				gen year = yofd(dofm(month))
				gcollapse (sum) dollar , by(category id_nr_unique year)
				
				rename dollar totRevYw_idnr
				
				* Save
				save "$path_intermediate${sample}/selection_revYwIdnr/revYwIdnr_`category'", replace
			}
			
			** Combine to one file
			clear
			foreach category of local categories {
				append using "$path_intermediate${sample}/selection_revYwIdnr/revYwIdnr_`category'", nolabel
			}
			
			* Save
			save "$path_intermediate${sample}/selection_revYwIdnr/revYwIdnr_allCats", replace
		}
		
		else {
			di "Only executed for full and _sample sample"
		}
		
		* Close log
		log close pdf_calculateWeights
	}
	
	if "$pdf_laggedControls" == "1" {
		* Start log
		cap log close pdf_laggedControls
		log using "$path_logs/a900_pdf_laggedControls.log", replace text name(pdf_laggedControls)
		
		* Load data
		use "$rootL/External Data/US_variables.dta", clear
		
		* Merge in PPI data
		rename dm month
		merge 1:1 month using "$rootL/External Data/ext_PPI_ACO", keep(match master) nogen
		rename ext_PPI_ACO_level ppi
		drop ext_*
		
		* Seasonally adjust ppi
		rename month dm
		foreach var of varlist ppi {
			gen l`var'=log(`var')*100
			gen dl`var'=D.l`var'
			local temp : var label `var'
			label var l`var' "`temp'"
			label var dl`var' "`temp'"
		}
		
		** Seasonal adjustment
		gen month_nr = month(dofm(dm))
		tab month_nr, gen(month_)
		local sample "year(dofm(dm))>=1981 & year(dofm(dm))<=2018"

		foreach var of varlist dlppi {
			gen `var'_sa= .
			qui regress `var' month_1-month_12 if `sample', noconst
			qui replace `var'_sa=(_b[month_1]+_b[month_2]+_b[month_3]+_b[month_4]+_b[month_5]+_b[month_6]+_b[month_7]+_b[month_8]+_b[month_9]+_b[month_10]+_b[month_11]+_b[month_12])/12 if `sample'                   // Add average coefficient
			qui replace `var'_sa=`var'+`var'_sa -_b[month_1]*month_1-_b[month_2]*month_2-_b[month_3]*month_3-_b[month_4]*month_4-_b[month_5]*month_5-_b[month_6]*month_6-_b[month_7]*month_7-_b[month_8]*month_8-_b[month_9]*month_9-_b[month_10]*month_10-_b[month_11]*month_11-_b[month_12]*month_12 if `sample'             // Subtract seasonal component
		}
				
		drop month_*

		foreach var in ppi_sa {
			gen l`var' = sum(dl`var')
			replace l`var' =. if l`var'==0
		} 
		rename dm month
		drop lppi dlppi dlppi_sa

		* Generate ebp_residual
		reg ebpnew L(1/6).(gs1 lip lcpi ebpnew)
		predict ebpnew_residual, r
		
		reg ebpnew L(1/6).(gs1 lip lppi_sa ebpnew)
		predict ebpnew_residualP, r
		
		* Generate lagged values
		forvalues i = 1/12 {
			foreach var of varlist gs1 ebpnew lip lcpi lppi_sa {
				gen L`i'_`var' = L`i'.`var'
			}
		}
		
		* Clean up and save
		save "$path_intermediate/US_variables.dta", replace
		
		* Close log
		log close pdf_laggedControls
	}
	
	if "$pdf_competitorPrices" == "1" {
		* Start log
		cap log close pdf_competitorPrices
		log using "$path_logs/a900_pdf_competitorPrices${P}.log", replace text name(pdf_competitorPrices)
		
		* Load data [*]
		use "$path_regressionData/allResetPrices_${P}${sample}", clear
		keep category month ${P} id_nr_unique upc_code iri_key L_x_${P}
		
		* Calculate monthly mean prices for each product (excl self)
		bysort category month upc_code: gegen total_${P} = total(${P})
		bysort category month upc_code: gegen count_${P} = count(${P})
		gen meanOthers_${P} = (total_${P} - ${P})/(count_${P} - 1)
		drop total_${P}
		
		* Calculate deviation from competitor prices
		gen gapComp_${P} = ${P} - meanOthers_${P}
		drop meanOthers_${P}

		* Demean by store average
		bysort iri_key category: gegen mean_gapComp_${P} = mean(gapComp_${P})
		gen gapCompDemeanedStCat_${P} = gapComp_${P} - mean_gapComp_${P}
		drop mean_gapComp_${P}
		
		* Lag
		xtset id_nr month, noquery
		sort id_nr month
		
		gen L_xComp_${P} = L.gapComp_${P}
		gen L_xCompDem_${P} = L.gapCompDemeanedStCat_${P}
		
		label variable L_xComp_${P} "Lagged gap own price vs average competitor price"
		label variable L_xCompDem_${P} "Lagged gap own price vs average competitor price. Demeaned by store X cat."
		
		* Price change indicator
		gen DL_${P} = D.${P}
		gen D_${P} = abs(DL_${P}) > 0.0001 & DL_${P} != .
		
		* Save
		save "$path_regressionData/allCompetitorPrices_${P}${sample}", replace
		
		* Close log
		log close pdf_competitorPrices
	}
	
	if "$pdf_drawSamples" == "1" {
		* Start log
		cap log close pdf_drawSamples
		log using "$path_logs/a900_pdf_drawSamples.log", replace text name(pdf_drawSamples)
		cap mkdir "$path_intermediate/samples"
		
		* Draw Sample
		** Reset-PRF
		cap frame drop tenpercent onepercent
		use "$path_regressionData/allResetPrices_PRF", replace
		
		gsample 10, cluster(id_nr) wor percent alt nopreserve
		frame put id_nr_unique, info(tenpercent)
		save "$path_regressionData/allResetPrices_PRF_10pct", replace
		frame put id_nr_unique, into(tenpercent)
		frame tenpercent: gduplicates drop id_nr_unique, force
		frame tenpercent: save "$path_intermediate/samples/sample_10pct", replace
		
		gsample 10, cluster(id_nr) wor percent alt nopreserve
		save "$path_regressionData/allResetPrices_PRF_1pct", replace
		frame put id_nr_unique, into(onepercent)
		frame onepercent: gduplicates drop id_nr_unique, force
		frame onepercent: save "$path_intermediate/samples/sample_1pct", replace
		gduplicates drop id_nr_unique
		
		
		** Comp-PRF
		use "$path_regressionData/allCompetitorPrices_PRF", replace
		frlink m:1 id_nr_unique, frame(tenpercent)
		drop if missing(tenpercent)
		drop tenpercent
		save "$path_regressionData/allCompetitorPrices_PRF_10pct", replace
		frlink m:1 id_nr_unique, frame(onepercent)
		drop if missing(onepercent)
		drop onepercent
		save "$path_regressionData/allCompetitorPrices_PRF_1pct", replace
		
		* Close log
		log close pdf_drawSamples
	}
	
	* Close log
    log close prepDispersionFiles
}

if "$prepDispersionFilesBySample" == "1" {
	* Start log
    cap log close prepDispersionFilesBySample
	log using "$path_logs/a900_prepDispersionFilesBySample${sample}.log", replace text name(prepDispersionFilesBySample)	
	local majorsample = cond("$sample" == "_sample", "_sample", "")
	
	if "$pdfbs_hybridPricesAndThresholds" == "1" {
		* Start log
		cap log close pdfbs_hybridPricesAndThresholds
		log using "$path_logs/900_pdfbs_hybridPricesAndThresholds${sample}.log", replace text name(pdfbs_hybridPricesAndThresholds)
		
		* Load data
		use "$path_regressionData/allCompetitorPrices_PRF${sample}", replace
		
		* Generate hybrid prices
		gegen int count_DPRF = total(D_PRF), by(category month upc_code)
		gegen total_DPRF = total(PRF*D_PRF), by(category month upc_code)
		gen meanOthers_DPRF = (total_DPRF - PRF*D_PRF)/(count_DPRF - D_PRF)
		drop total_DPRF
		
		** Calculate deviation from hybrid price
		gen gapHybrid_DPRF = PRF - meanOthers_DPRF
		drop meanOthers_DPRF
		
		** Demean by store average
		gegen mean_gapHybrid_DPRF = mean(gapHybrid_DPRF), by(iri_key category)
		gen gapHybridDemeanedStCat_DPRF = gapHybrid_DPRF - mean_gapHybrid_DPRF
		drop mean_gapHybrid_DPRF
		
		** Lag
		gen L_xHybridDem_DPRF = L.gapHybridDemeanedStCat_DPRF
		
		** Update counts
		replace count_DPRF = count_DPRF - D_PRF
		replace count_PRF = count_PRF - 1
		
		* Save data
		keep id_nr_unique month D_PRF DL_PRF count_PRF count_DPRF L_xHybridDem_DPRF
		recast int count_PRF, force
		note: "The hybrid gap is the average level of all price changing competitors. It is demeaned by the store-category average gap and lagged one month."
		save "$path_regressionData/allHybridPrices_DPRF${sample}", replace
		
		* Close log
		log close pdfbs_hybridPricesAndThresholds
	}

	if "$pdfbs_priceAgeGaps" == "1" {
		* Start log
		cap log close pdfbs_priceAgeGaps
		log using "$path_logs/900_pdfbs_priceAgeGaps${P}.log", replace text name(pdfbs_priceAgeGaps)
		
		* Load data
		use "$path_regressionData/allResetPrices_${P}${sample}", clear
		keep id_nr_unique month ${P} D_${P}
		
		* Identify price change moments
		gsort id_nr_unique month
		gen DP_month = month if D_${P} == 1
		by id_nr_unique (month): replace DP_month = DP_month[_n-1] if ~missing(DP_month[_n-1]) & missing(DP_month)
		
		* Months since price change
		gen priceAge = month - DP_month + 1
		gen L_xPriceAge = L.priceAge
		gen L_xLnPriceAge = log(L_xPriceAge)
		
		* Save
		keep id_nr_unique month ${P} L_x*
		save "$path_regressionData/allPriceAgePrices_${P}${sample}", replace
		
		* Close log
		log close pdfbs_priceAgeGaps
	}

	** Final preparation
	if "$pdfbs_finalPrep" == "1" {
		* Start log
		cap log close pdfbs_finalPrep
		log using "$path_logs/2_pdfbs_finalPrep.log", replace text name(pdfbs_finalPrep)
		
		* Add external data, price change indicators and weights
		foreach gap in Reset Competitor {
			local price ${P}
			* Load data
			use "$path_regressionData/all`gap'Prices_`price'${sample}", replace
			
			* Add instrument in levels
			merge m:1 month using "$rootL/External Data/Karadi2017.dta", keepusing(FF4_alt1_norm) keep(match master) nogen
			
			* Split price increases and decreases
			sort id_nr_unique month
			gen D01_`price' = D_`price'
			replace D_`price' = -D_`price' if DL_`price' < 0
			
			* Calculate 3 month price changes	// because of possible invalids being dropped, it would be better to do this in PrepData
			gen DL_`price'_h3 = F2.`price' - L.`price'
			gen D_`price'_h3 = sign(DL_`price'_h3)
			gen D01_`price'_h3 = abs(D_`price'_h3)
			
			* Calculate 12 month price changes	// because of possible invalids being dropped, it would be better to do this in PrepData
			gen DL_`price'_h12 = F11.`price' - L.`price'
			gen D_`price'_h12 = sign(DL_`price'_h12)
			gen D01_`price'_h12 = abs(D_`price'_h12)

			* 24 months
			gen DL_`price'_h24 = F23.`price' - L.`price'
			gen D_`price'_h24 = sign(DL_`price'_h24)

			* Rename h1 variable
			rename D_`price' D_`price'_h1
			rename D01_`price' D01_`price'_h1
			rename DL_`price' DL_`price'_h1
			
			* Add weights
			gen year = yofd(dofm(month))
			
			merge m:1 id_nr_unique year using "$path_intermediate`majorsample'/selection_revYwIdnr/revYwIdnr_allCats", keepusing(totRevYw_idnr) keep(match master) nogen
			rename totRevYw_idnr weight
			label variable weight "Total revenue of id_nr that year"
			
			save "$path_regressionData/all`gap'Prices_`price'${sample}_prepped", replace
		}
	
		* Generate combined file
		** Load reset gaps
		use "$path_regressionData/allResetPrices_${P}${sample}_prepped", replace
		
		** Merge in competitor gaps
		merge 1:1 id_nr_unique month using "$path_regressionData/allCompetitorPrices_${P}${sample}", keepusing(L_xCompDem_${P}) keep(match)
		drop _merge
				
		** Add in price ages
		merge 1:1 id_nr_unique month using "$path_regressionData/allPriceAgePrices_${P}${sample}", keepusing(L_xLnPriceAge L_xPriceAge)
		
		** Merge in US variables
		merge m:1 month using "$path_intermediate/US_variables.dta", keep(match master) nogen
		drop L7_* L8_* L9_* L10_* L11_* L12_*
			
		gen month12 = month(dofm(month))
		sort id_nr_unique month
		
		** Save
		save "$path_regressionData/allResetCompPriceAgePrices_${P}${sample}_prepped", replace
		
		* Close log
		log close pdfbs_finalPrep
	}
	
	**# pdfbs_productMarketFrequencies
	if "$pdfbs_productMarketFrequencies" == "1" {
		* Start log
		cap log close pdfbs_productMarketFrequencies
		log using "$path_logs/pdfbs_productMarketFrequencies.log", append text name(pdfbs_productMarketFrequencies)
		cap mkdir "$path_output/frequencyVariables"
		cap mkdir "$path_intermediate/storeMarketCrosswalk"
		cap mkdir "$path_intermediate/idnrProductCrosswalk"
		cap mkdir "$path_intermediate/idnrProductCrosswalk/byCategory"

		* Store-market crosswalk
		** Combine yearly info
		clear
		local files : dir "$path_input/storeData" files "store*.dta", respectcase
		foreach file of local files {
			append using "$path_input/storeData/`file'"
		}
		
		** Reduce to market-store crosswalk
		keep iri_key market week
		gduplicates drop iri_key market, force
		sort iri_key
		bysort iri_key (market): gen n = _n
		drop if n > 1
		drop n week
		gunique iri_key
		encode market, gen(marketID)
		drop market
		compress
		
		** Save
		save "$path_intermediate/storeMarketCrosswalk/storeMarketCrosswalk", replace
		

		* Calculate product market level frequencies
		** Load data
		use "$path_regressionData/allResetCompPriceAgePrices_PRF_10pct_prepped", replace
		drop _merge
		
		** Add market and product info
		merge m:1 iri_key using "$path_intermediate/storeMarketCrosswalk/storeMarketCrosswalk", keep(match master) keepusing(marketID) nogen
		gen upc_unique = upc_code + 1e8*category
		
		** Normalised gap measures
		bysort	marketID upc_unique: gegen total_gapC = total(abs(L_xCompDem_PRF))
		by		marketID upc_unique: gegen count_gapC = count(L_xCompDem_PRF)
		bysort 	marketID upc_unique iri_key: gegen total_gapCS = total(abs(L_xCompDem_PRF))
		by		marketID upc_unique iri_key: gegen count_gapCS = count(L_xCompDem_PRF)
		
		gen mean_gapC = (total_gapC - total_gapCS)/(count_gapC - count_gapCS)
		gen L_xCompDem_PRF_norm = L_xCompDem_PRF/mean_gapC
		label variable L_xCompDem_PRF_norm "Normalised comp gap (see note)"
		note L_xCompDem_PRF_norm: "Comp gap, demeaned by average absolute product-market gap (excl own store)"
		
		** Standardised gap measures
		bysort id_nr_unique (month): statnoself L_xCompDem_PRF
		gen L_xCompDem_PRF_stand = (L_xCompDem_PRF - meanS)/sdS
		drop meanS sdS
		
		sum L_xCompDem_PRF*
		
		
		** Frequency measures
		*** Up/down indicators
		**** Prices were increased
		gen D_PRF_up_h24 = 1 		if D_PRF_h24 == 1
		replace D_PRF_up_h24 = 0 	if inlist(D_PRF_h24, 0, -1)
		
		**** Prices were decreased
		gen D_PRF_down_h24 = 1 	if D_PRF_h24 == -1
		replace D_PRF_down_h24 = 0 if inlist(D_PRF_h24, 0, 1)
		
		*** Means (leave self out)
		**** Product-Market
		gegen PM_total_up_h24 = total(D_PRF_up_h24), by(upc_unique marketID)
		gegen PM_count_up_h24 = count(D_PRF_up_h24), by(upc_unique marketID)
		gen PM_mean_up_h24 = (PM_total_up_h24 - D_PRF_up_h24)/(PM_count_up_h24 - 1)
		gegen PM_total_down_h24 = total(D_PRF_down_h24), by(upc_unique marketID)
		gegen PM_count_down_h24 = count(D_PRF_down_h24), by(upc_unique marketID)
		gen PM_mean_down_h24 = (PM_total_down_h24 - D_PRF_down_h24)/(PM_count_down_h24 - 1)
		gegen ID_total_up_h24 = total(D_PRF_up_h24), by(id_nr_unique)
		gegen ID_count_up_h24 = count(D_PRF_up_h24), by(id_nr_unique)
		gen ID_mean_up_h24 = (ID_total_up_h24 - D_PRF_up_h24)/(ID_count_up_h24 - 1)
		gegen ID_total_down_h24 = total(D_PRF_down_h24), by(id_nr_unique)
		gegen ID_count_down_h24 = count(D_PRF_down_h24), by(id_nr_unique)
		gen ID_mean_down_h24 = (ID_total_down_h24 - D_PRF_down_h24)/(ID_count_down_h24 - 1)

		drop *total* *count*
		sum PM_mean* ID_mean*
		
		** Save
		keep id_nr_unique month PM_mean* ID_mean* L_xCompDem_PRF_norm L_xCompDem_PRF_stand 
		label data "Product-Market level frequencies, up/down, h24"
		save "$path_output/frequencyVariables/productMarket_10pct", replace		
	
		
		* Close log
		log close pdfbs_productMarketFrequencies
	}

	
	* Close log
    log close prepDispersionFilesBySample
}

if "$logitSamples_10pct" == "1" {
	* Start log
	cap log close logitSamples_10pct
	log using "$path_logs/900_logitSamples_10pct.log", replace text name(logitSamples_10pct)
	
	* Load 10pct sample
	use "$path_regressionData/allResetCompPriceAgePrices_${P}_10pct_prepped", replace
	gsample 10, cluster(id_nr_unique) percent wor
	xtset id_nr_unique month
	
	* Reduce dataset
	keep id_nr_unique month D_PRF_h24 L_xCompDem_PRF ebpnew ebpnew_residual L_xLnPriceAge L*_lip L*_lcpi L*_gs1 L*_ebpnew gs1 lip lcpi month12
	
	* Generate additional variables
	** Up/down
	gen D_PRF_up_h24 = 1 		if D_PRF_h24 == 1
	replace D_PRF_up_h24 = 0 	if inlist(D_PRF_h24, 0, -1)
	gen D_PRF_down_h24 = 1 	if D_PRF_h24 == -1
	replace D_PRF_down_h24 = 0 if inlist(D_PRF_h24, 0, 1)
	
	** Factor variables
	tab month12, gen(month12_)
	gen interactionCompEbpR = L_xCompDem_PRF*ebpnew_residual
	label define upNoDown -1 "Down" 0 "No change" 1 "Up", replace
	label values D_PRF_h24 upNoDown
	
	* Save
	save "$path_regressionData/logitData_1pct", replace
	gsample 10, cluster(id_nr_unique) percent wor
	save "$path_regressionData/logitData_01pct", replace
	gsample 10, cluster(id_nr_unique) percent wor
	save "$path_regressionData/logitData_001pct", replace
	gsample 10, cluster(id_nr_unique) percent wor
	save "$path_regressionData/logitData_0001pct", replace
	gsample 10, cluster(id_nr_unique) percent wor
	save "$path_regressionData/logitData_00001pct", replace
	
	
	* Close log
	log close logitSamples_10pct
}

** Prepare aggregate data for local projections etc
if "$prepAggregateData" == "1" {
    * Start log
	cap mkdir "$path_intermediate${sample}/PriceSeries"
	cap mkdir "$path_intermediate${sample}/PriceSeries/temp"
	cap mkdir "$path_intermediate${sample}/Revenue Stitch (long)"
	
	* Loop over categories
	foreach category of local categories {
		* Reduce to monthly
		** Load data
		use "$path_output/completePriceData${sample}/`category'", clear
		local category = subinstr("`category'", ".dta", "", .)
		
		noisily di _newline _col(3) as text "`category': `c(current_time)'" _newline
		rename price price_raw
		
		** Collapse to months	// If we revert to means, remove everything between this and the *gcollapse command.
		*** Prices
		qui foreach price of global prices {
			noisily di "`price'"
			bysort id_nr month: egen double `price'Mode = mode(`price'), maxmode
		}
		
		*** Dollars
		bysort id_nr month: egen double dollarSum = total(dollar)
		
		*** Collapse
		gegen tag = tag(id_nr month)
		drop if tag == 0
		drop tag
		drop $prices dollar
		
		*** Rename
		rename *Mode *
		rename *Sum *
		keep id_nr upc_code month $prices iri_key dollar
			
		
		* Calculate yearly expenditure
		gen year = yofd(dofm(month))
		bysort id_nr year: egen totRev_year = total(dollar)
		
		* Adjust monthly expenditure for #of weeks in the month
		merge m:1 month using "$path_intermediate/NrOfWeeks_m", nogen keep(match master)
		replace dollar = dollar / nr_of_weeks * 52/12
		drop nr_of_weeks
		
		* Calculate monthly price series (id-level)
		** Gap indicator
		bysort id_nr (month): gen gap = month - month[_n-1]
		bysort id_nr		: egen maxGap = max(gap)
		bysort id_nr 		: gen maxGap_forCount = maxGap if _n == 1
		
		gen byte validDrop = (maxGap == 1)
		gen byte valid1 = (gap == 1)
		gen byte valid3 = (gap <= 3)
		
		** Retain indicator
		bysort id_nr 		: gen end = month[_N]
		bysort id_nr 		: gen start = month[1]
		gen int endYear = yofd(dofm(end))
		gen int startYear = yofd(dofm(start))
		gen int startMonth = month(dofm(start))
		gen int endMonth = month(dofm(end))

		gen byte retain = (year < endYear	& year > startYear)														// Retain if ... the product started selling before the current year and stopped after	// "the middle"
		replace retain = 1 if year == endYear				& endMonth 	== 12	& year > startYear 					// ... the product started selling in January this year									// "the tail"
		replace retain = 1 if year == startYear			  	& startMonth 	== 1	& year < endYear				// ... the product stopped selling in December this year								// "the head"
		replace retain = 1 if endYear == startYear 			& startMonth 	== 1	& endMonth == 12
		
		** Trim last month of regular and reference price (per id_nr)
		replace price_ref = . if month == end

		** Log differences (~ relative price changes)
		noisily di as result "Calculating price measures"
		qui foreach price of global prices {
			noisily di as result _col(3) "... `price'"
			bysort id_nr (month): gen DL_`price' = log(`price') - log(`price'[ _n-1])
			bysort id_nr (month): gen DLu_`price' = (log(`price') - log(`price'[ _n-1])) * ((log(`price') - log(`price'[ _n-1])) > 0.01)		// Second part restricts to increases, 0 otherwise
			bysort id_nr (month): gen DLd_`price' = (log(`price') - log(`price'[ _n-1])) * ((log(`price') - log(`price'[ _n-1])) < -0.01)		// Second part restricts to decreases, 0 otherwise
			bysort id_nr (month): gen byte DP_`price' = abs(`price' - `price'[_n-1])>0.01 if ~missing(`price'[_n-1], `price')
			bysort id_nr (month): gen byte DPu_`price' = (`price' - `price'[_n-1])>0.01 if ~missing(`price'[_n-1], `price')
			bysort id_nr (month): gen byte DPd_`price' = (`price' - `price'[_n-1])<-0.01 if ~missing(`price'[_n-1], `price')
			
			gen DL_`price'_v1_r = DL_`price' 	if valid1 == 1		& retain == 1
			gen DLu_`price'_v1_r = DLu_`price' 	if valid1 == 1		& retain == 1
			gen DLd_`price'_v1_r = DLd_`price' 	if valid1 == 1		& retain == 1
			gen byte DP_`price'_v1_r = DP_`price' 	if valid1 == 1		& retain == 1
			gen byte DPu_`price'_v1_r = DPu_`price' 	if valid1 == 1		& retain == 1
			gen byte DPd_`price'_v1_r = DPd_`price' 	if valid1 == 1		& retain == 1
			
			rename DL_`price' DL_`price'_all
			rename DLu_`price' DLu_`price'_all
			rename DLd_`price' DLd_`price'_all
			rename DP_`price' DP_`price'_all
			rename DPu_`price' DPu_`price'_all
			rename DPd_`price' DPd_`price'_all
		}
		
		** Micro frequency and sizes
		*** Calculate micro measures
		noisily di as result "Calculating and aggregating micro measures over horizon"
		cap frame drop horizonData
		frame create horizonData
		global prices_limited "price_ref"
		xtset id_nr month, noquery
		foreach h of global horizons {
			noisily di as result _col(3) "... h = `h'"
			local hM1 = `h' - 1
			qui foreach price of global prices_limited {
				* Does the price change/increase/decrease in the future (h-1 months from now vs one month ago)
				gen byte mDP_`price'_h`h'_r = abs(F`hM1'.`price' - L.`price') > 0.01 if ~missing(L.`price', F`hM1'.`price') & retain == 1
				gen byte mDPu_`price'_h`h'_r = (F`hM1'.`price' - L.`price') > 0.01 if ~missing(L.`price', F`hM1'.`price')  & retain == 1
				gen byte mDPd_`price'_h`h'_r = (F`hM1'.`price' - L.`price') < -0.01 if ~missing(L.`price', F`hM1'.`price')  & retain == 1
				label var mDP_`price'_h`h'_r "Price change observed between t+h-1 and t-1, micro level"
				
				* Size of price change/increase/decrease
				gen mDL_`price'_h`h'_r = log(F`hM1'.`price') - log(L.`price') if ~missing(L.`price', F`hM1'.`price')  & retain == 1
				gen mDLu_`price'_h`h'_r = (log(F`hM1'.`price') - log(L.`price'))*((F`hM1'.`price' - L.`price') > 0.01) if ~missing(L.`price', F`hM1'.`price')  & retain == 1
				gen mDLd_`price'_h`h'_r = (log(F`hM1'.`price') - log(L.`price'))*((F`hM1'.`price' - L.`price') < -0.01) if ~missing(L.`price', F`hM1'.`price')  & retain == 1
				label var mDL_`price'_h`h'_r "Price change size between t+h-1 and t-1, micro level"
			}
			
			* Weights
			qui gen totRev_year_h`h' = (F`hM1'.totRev_year + L.totRev_year)/2
			label var totRev_year_h`h' "Average of totRev_year at beginning and end of horizon"
			
			* Aggregated [*]
			cap frame drop horizonFrame
			frame put mDP*_h`h'_r mDL*_h`h'_r totRev_year_h`h' month, into(horizonFrame)
			qui frame horizonFrame {
				gcollapse (mean) mDP*_h`h'_r mDL*_h`h'_r [iw=totRev_year_h`h'], by(month) fast
				gen mSize_PRF_h`h'_r = mDL_price_ref_h`h'_r / mDP_price_ref_h`h'_r
				gen mSizeU_PRF_h`h'_r = mDLu_price_ref_h`h'_r / mDPu_price_ref_h`h'_r
				gen mSizeD_PRF_h`h'_r = mDLd_price_ref_h`h'_r / mDPd_price_ref_h`h'_r
				
				save "$path_intermediate${sample}/PriceSeries/temp/`category'_h`h'", replace
			}	
			drop mDP*_h`h'_r mDL*_h`h'_r totRev_year_h`h'
		}
		
		
		** Total revenue
		rename dollar TR
		gen TR_v1_r = TR if valid1 == 1 & retain == 1
		bysort id_nr (month): gen DL_TR = log(TR) - log(TR[_n-1])
		gen DL_TR_v1_r = DL_TR if valid1 == 1 & retain == 1

		* Calculate monthly price series (category-level)
		** Collapse
		gen totRev_year_r = totRev_year if retain == 1
		gcollapse (mean) DL_* DLu_* DLd_* DP* (rawsum) totRev_year totRev_year_r TR* [iw = totRev_year], by(month) fast

		** Sales price indicators
		*** Inflations (pi_si = pi_raw - pi_ref/reg)
		gen DL_Sref_v1_r = DL_price_raw_v1_r - DL_price_ref_v1_r
		
		*** Frequencies (eps_si = eps_raw - eps_ref/raw)
		gen DP_Sref_v1_r = DP_price_raw_v1_r - DP_price_ref_v1_r
		
		*** Size (phi_si = pi_si / eps_si)
		gen size_Sref_v1_r = DL_Sref_v1_r / DP_Sref_v1_r 
		
		** Create indices
		foreach DL_price of varlist DL_* {
			local price = subinstr("`DL_price'", "DL_", "", .)
			gen idx_`price' = sum(DL_`price')
		}


		* Merge in micro measures
		foreach h of global horizons {
			merge 1:1 month using "$path_intermediate${sample}/PriceSeries/temp/`category'_h`h'", nogen
		}
		
		* Save
		save "$path_intermediate${sample}/PriceSeries/`category'", replace
		
		
		* Generate stitch data
		use "$path_output/completePriceData${sample}/`category'", clear
		gcollapse (sum) dollar, by(id_nr month) fast
		
		* Calculate yearly expenditure
		gen year = yofd(dofm(month))
		bysort id_nr year: egen totRev_year = total(dollar)
		
		* Adjust monthly expenditure for #of weeks in the month
		merge m:1 month using "$path_intermediate/NrOfWeeks_m", nogen keep(match master)
		replace dollar = dollar / nr_of_weeks * 52/12
		drop nr_of_weeks
		
		* See if present in 6 months before and after 2007m1, 2008m1
		gen range07 = inrange(month, tm(2006m6), tm(2007m5))
		gen range08 = inrange(month, tm(2007m6), tm(2008m5))
		bysort id_nr (month): gegen obs07 = total(range07)
		bysort id_nr (month): gegen obs08 = total(range08)
		
		if ("${sample}" == "_sample") & ("`category'" == "saltsnck") {		// some weird issue with _sample leading to all empty dataset in saltsnck, does not affect actual data
			replace obs07 = 12		
			replace obs08 = 12		
		}
		drop if max(obs07,obs08) < 12
		drop if ~inrange(year, 2005, 2009)
		
		gen rangeOK = (obs07 >= 12 & year == 2007)|(obs08 >= 12 & year == 2008)
		replace rangeOK = 1 if ~inrange(year, 2007, 2008)
		
		* Calculate log differences
		bysort id_nr (month): gen DL_TR = log(dollar) - log(dollar[_n-1]) if rangeOK == 1

		
		collapse (mean) DL_TR [iw=totRev_year], by(month)
		rename DL_TR DL_TR_stitch
			
		keep if month == tm(2007m1) | month == tm(2008m1)
		gen category = "`category'"
		save "$path_intermediate${sample}/Revenue Stitch (long)/`category'_prepped.dta", replace
	}
}

** Combine aggregate data over categories
if "$combineAggregateData" == "1" {
    * Start log
    cap log close combineAggregateData
    log using "$path_logs/combineAggregateData${sample}.log", append text name(combineAggregateData)
	cap mkdir "$path_output${sample}"
	cap mkdir "$path_output${sample}/PriceSeries"
	cap mkdir "$path_output${sample}/resetPricesInflation"
	
	* Clear dataset
	clear
	gen category = ""

	* Append all files
	di "$path_intermediate/PriceSeries${sample}"
	foreach catfile of local categories {
		local category = subinstr("`catfile'", ".dta", "", .)
		di `"append using "$path_intermediate${sample}/PriceSeries/`category'", nolabel"'
		append using "$path_intermediate${sample}/PriceSeries/`category'", nolabel
		replace category = "`category'" if missing(category)
	}
	
	* Sizes
	gen sizeA_PRF_v1_r = DL_price_ref_v1_r / DP_price_ref_v1_r 
	label variable sizeA_PRF_v1_r "Size for all price changes"

	** Up/Down sizes
	gen sizeU_PRF_v1_r = DLu_price_ref_v1_r / DPu_price_ref_v1_r 
	gen sizeD_PRF_v1_r = DLd_price_ref_v1_r / DPd_price_ref_v1_r 
	label variable sizeU_PRF_v1_r "Size for upwards price changes"
	label variable sizeD_PRF_v1_r "Size for downwards price changes"
	
	** Create indices
	foreach DL_price of varlist DLu_* {
		local price = subinstr("`DL_price'", "DLu_", "", .)
		gen idxU_`price' = sum(DLu_`price')
	}
	foreach DL_price of varlist DLd_* {
		local price = subinstr("`DL_price'", "DLd_", "", .)
		gen idxD_`price' = sum(DLd_`price')
	}

	* Revenue series
	** Insert stitch data
	local preppedStitchFiles : dir "$path_intermediate${sample}/Revenue Stitch (long)" files "*_prepped.dta"
	foreach file of local preppedStitchFiles {
		merge 1:1 month category using "$path_intermediate${sample}/Revenue Stitch (long)/`file'", update nogen
	}
	
	bysort category (month): gen DL_TR_DoS_all = log(TR) - log(TR[_n-1])					// difference of revenue sums
	bysort category (month): gen DL_TR_DoS_v1_r = log(TR_v1_r) - log(TR_v1_r[_n-1])
	rename DL_TR DL_TR_SoD_all																// sum of revenue differences
	rename DL_TR_v1_r DL_TR_SoD_v1_r
	
	local DL_TR_vars "DL_TR_SoD_all DL_TR_SoD_v1_r DL_TR_DoS_all DL_TR_DoS_v1_r"

	foreach var of local DL_TR_vars {		
		gen `var'_new = `var'
		replace `var'_new = DL_TR_stitch if ~missing(DL_TR_stitch)			// Replace 2007m1 and 2008m1 by stitch
		bysort category (month): gen idx_`var'_new = sum(`var'_new)		// Gen index of new TR changes
		
		rename `var' `var'_orig
		bysort category (month): gen idx_`var'_orig = sum(`var'_orig)		// Gen index of original TR changes
	}
	drop DL_TR_stitch idx_TR idx_TR_v1_r

	
	* Real expenditure
	foreach var of varlist DL_TR* {
		local varnodltr = subinstr("`var'", "DL_TR_", "", .)
		gen DL_RTR_`varnodltr' = `var' - DL_price_raw_v1_r
		bysort category (month): gen idx_RTR_`varnodltr' = sum(DL_RTR_`varnodltr')		// Gen index of original TR changes
	}
	
	* Generate aggregate
	** Aggregate category
	insobs 1
	replace category = "aggregate" if missing(category)
	replace month = tm(2001m1) if missing(month)
	fillin category month
	drop _fillin
	
	** Weights
	bysort month: egen double aggRev = total(totRev_year_r)
	gen double weight_yw = totRev_year_r/aggRev

	** Aggregate series
	foreach var of varlist DL* DP* size* mDL* mDP* {
		bysort month: egen agg_`var' = total(`var' * weight_yw)
	}
	rename *idx_DL* *idx*
	
	** Assign aggregate values to aggregate category
	sort category month
	
	foreach var of varlist DL* DP* size* mDL* mDP* {
		replace `var' = agg_`var' if category == "aggregate"
	}
	replace totRev_year_r = aggRev if category == "aggregate"
	replace weight_yw = 1 if category == "aggregate"
	
	foreach var of varlist DL_* {
		local nodlvar = subinstr("`var'", "DL_", "", .)
		replace idx_`nodlvar' = sum(`var') if category == "aggregate"
	}
	foreach var of varlist DLu_* {
		local nodlvar = subinstr("`var'", "DLu_", "", .)
		replace idxU_`nodlvar' = sum(`var') if category == "aggregate"
	}
	foreach var of varlist DLd_* {
		local nodlvar = subinstr("`var'", "DLd_", "", .)
		replace idxD_`nodlvar' = sum(`var') if category == "aggregate"
	}

	drop agg*
	
	** Remove zeros from aggregate
	ds month category weight_yw, not
	recode `r(varlist)' (0 = .) if category == "aggregate"
	
	** Seasonal adjustment
	gen month_nr = month(dofm(month))
	tab month_nr, gen(month_)

	levelsof category, local(allCategories)
	_dots 0, title(Generating seasonally adjusted variables)
	local counter = 1
	foreach var of varlist mDL* mDP* DL* DP* size* {
		noisily _dots `counter++' 0
		qui gen `var'_SA= .
		qui foreach category of local allCategories {
			regress `var' month_1-month_12 if category == "`category'", noconst
			replace `var'_SA=(_b[month_1]+_b[month_2]+_b[month_3]+_b[month_4]+_b[month_5]+_b[month_6]+_b[month_7]+_b[month_8]+_b[month_9]+_b[month_10]+_b[month_11]+_b[month_12])/12 if category == "`category'"		// Add average coefficient
			replace `var'_SA=`var'+`var'_SA -_b[month_1]*month_1-_b[month_2]*month_2-_b[month_3]*month_3-_b[month_4]*month_4-_b[month_5]*month_5-_b[month_6]*month_6-_b[month_7]*month_7-_b[month_8]*month_8-_b[month_9]*month_9-_b[month_10]*month_10-_b[month_11]*month_11-_b[month_12]*month_12 if category == "`category'"	// Subtract seasonal component
		}
	}
	
	drop month_*
	
	* Finalise
	** Trim series
	drop if month == tm(2001m1)			// No price changes possible in first month
	foreach var of varlist DP* *price_ref* {		// Also truncation bias possible in 2nd month (due to ref/reg method)
		replace `var' = . if inlist(month, tm(2001m2), tm(2012m11), tm(2012m12))
	}

	** Data management
	encode category, gen(categoryID)
	xtset categoryID month
	order categoryID, first
	order category, last
	
	** Add external data
	merge m:1 month using "$rootL/External Data/ext_CPI_FAH_SA", keep(match master) nogen
	merge m:1 month using "$rootL/External Data/ext_CPI_ALL_SA", keep(match master) nogen
	merge m:1 month using "$rootL/External Data/ext_CPI_FB_SA", keep(match master) nogen
	drop *CPI*DL *CPI*level
	
	foreach var of varlist ext_* {
		local var1 = `var'[1]
		replace `var' = `var' - `var1'
	}
	
	foreach var of varlist DL_price_raw_v1_r_SA DL_price_ref_v1_r_SA {
		local nodlvar = subinstr("`var'", "DL_", "", .)
		bysort category (month): gen idx_`nodlvar' = sum(`var')		// Gen index of SA series
	}	
	
	* Save
	save "$path_output${sample}/PriceSeries/aggregatePlusCategories", replace
	
	* Prepare reset price inflation
	** Extract inflation data
	foreach category of local categories {
		* Load
		use "$path_output/resetPrices${sample}/resetPrices_`category'", clear

		* Reduce
		keep DL_P_reset1 DL_PRF_reset1 DL_P_month
		drop if missing(DL_P_month)		// Keeps only the observations that indicate the monthly reset price inflation value
		
		* Clean
		rename DL_P_month month
		gen category = "`category'"
		rename *1 *
		
		* Save
		save "$path_output${sample}/resetPricesInflation/resetPriceInflation_`category'", replace
	}
	
	** Combine to single file
	clear
	foreach category of local categories {
		append using "$path_output${sample}/resetPricesInflation/resetPriceInflation_`category'", nolabel
	}
	
	** Attach weights
	merge 1:1 category month using "$path_output${sample}/PriceSeries/aggregatePlusCategories", keepusing(weight_yw) keep(match) nogen
	
	** Generate aggregate
	*** Aggregate category
	insobs 1
	replace category = "aggregate" if missing(category)
	replace month = tm(2001m1) if missing(month)
	fillin category month
	drop _fillin
	
	*** Aggregate series
	foreach var of varlist DL* {
		bysort month: egen agg_`var' = total(`var' * weight_yw)
		gen _agg_`var' = agg_`var'		// temporary variable
		bysort month: replace _agg_`var' = . if _n != 1
	}
	
	*** Assign aggregate values to aggregate category
	sort category month
	
	foreach var of varlist DL* {
		replace `var' = agg_`var' if category == "aggregate"
	}
	replace weight_yw = 1 if category == "aggregate"
	
	drop agg* _agg*
	
	** Calculate seasonally adjusted values
	*** Trim series
	drop if month == tm(2001m1)			// No price changes possible in first month
	foreach var of varlist DL_* {		// Also truncation bias possible in 2nd month (due to ref/reg method)
		replace `var' = . if inlist(month, tm(2001m2), tm(2012m11), tm(2012m12))
	}
	
	*** Create month dummies
	gen month_nr = month(dofm(month))
	tab month_nr, gen(month_)

	*** Regress
	levelsof category, local(allCategories)
	qui foreach var of varlist DL* {
		gen `var'_SA= .
		foreach category of local allCategories {
			regress `var' month_1-month_12 if category == "`category'", noconst
			replace `var'_SA=(_b[month_1]+_b[month_2]+_b[month_3]+_b[month_4]+_b[month_5]+_b[month_6]+_b[month_7]+_b[month_8]+_b[month_9]+_b[month_10]+_b[month_11]+_b[month_12])/12 if category == "`category'"		// Add average coefficient
			replace `var'_SA=`var'+`var'_SA -_b[month_1]*month_1-_b[month_2]*month_2-_b[month_3]*month_3-_b[month_4]*month_4-_b[month_5]*month_5-_b[month_6]*month_6-_b[month_7]*month_7-_b[month_8]*month_8-_b[month_9]*month_9-_b[month_10]*month_10-_b[month_11]*month_11-_b[month_12]*month_12 if category == "`category'"	// Subtract seasonal component
		}
	}
	
	drop month_*
	
	** Generate index
	foreach var of varlist DL_* {
		local nodlvar = subinstr("`var'", "DL_", "", .)
		bysort category (month): gen idx_`nodlvar' = sum(`var')
	}
	
	** Save
	save "$path_output${sample}/resetPricesInflation/resetPriceInflation_allCategoriesAndAggregate", replace
	
	
	* Save for Local Projections
	use "$path_output${sample}/PriceSeries/aggregatePlusCategories", replace
	keep if category == "aggregate"
	drop if month <= tm(2000m12)
	drop *_all*
	
	merge 1:1 category month using "$path_output${sample}/resetPricesInflation/resetPriceInflation_allCategoriesAndAggregate", nogen keep(match master)
	
	* Save
	save "$path_output${sample}/PriceSeries/aggregateData", replace		// This forms the base for the local projections
	
   
    * Close log
    log close combineAggregateData
}



log close log